This is an exploratory study to find out if there is a change in temperature in Makkah The last decade. The study is part of a developmental awareness series on YouTube presented by Eng. Ahmed H. Alfi'er Alshareef in Arabic.
import numpy as np
import re
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sktime.utils.plotting.forecasting import plot_ys
import warnings
warnings.filterwarnings("ignore")
(Total number of questions is 5)
===================================
www.timeanddate.com
makkah_weather = pd.read_csv('../data/makkah_weather.csv')
makkah_weather.head()
# Create datetime object and drop hour
makkah_weather['date'] = makkah_weather.date +' '+ makkah_weather.hour
makkah_weather['date'] = pd.to_datetime(makkah_weather['date'], dayfirst=True, errors='coerce')
makkah_weather.drop('hour', axis=1, inplace=True)
makkah_weather.head()
makkah_weather.visibility = makkah_weather.visibility.str.split('�').str[0]
makkah_weather.head()
makkah_weather.info()
print(makkah_weather.visibility.unique())
print(makkah_weather.wind.unique())
print(makkah_weather.temperature.unique())
makkah_weather = makkah_weather.replace({'wind':'No wind'},0)
# Change data type for columns
convert_dict = {'visibility':float, 'wind':float, 'temperature':float}
makkah_weather = makkah_weather.astype(convert_dict)
makkah_weather.info()
print(makkah_weather.isna().sum())
# Fill nan values by averaging the before & after values for temperature, wind, humidity, visibility and barometer
makkah_weather.temperature = (makkah_weather.temperature.ffill()+makkah_weather.temperature.bfill())/2
makkah_weather.wind = (makkah_weather.wind.ffill()+makkah_weather.wind.bfill())/2
makkah_weather.humidity = (makkah_weather.humidity.ffill()+makkah_weather.humidity.bfill())/2
makkah_weather.barometer = (makkah_weather.barometer.ffill()+makkah_weather.barometer.bfill())/2
makkah_weather.visibility = (makkah_weather.visibility.ffill()+makkah_weather.visibility.bfill())/2
makkah_weather.isna().sum()
makkah_weather.groupby('condition')['condition'].count()
makkah_weather = makkah_weather.replace({'condition':'"'},'Clear')
print(makkah_weather.groupby('condition')['condition'].count())
makkah_weather.info()
# Daily max , min & mean temp.
d_temp = makkah_weather.groupby([makkah_weather['date'].dt.date])['temperature'].agg(['min','max','mean'])
# Monthly max , min & mean temp.
m_temp = makkah_weather.groupby([makkah_weather['date'].dt.year, makkah_weather['date'].dt.month
])['temperature'].agg(['min','max','mean'])
# yearly max , min & mean temp.
y_temp = makkah_weather.groupby(makkah_weather['date'].dt.year)['temperature'].agg(['min','max','mean'])
# Credit: https://stackoverflow.com/questions/43214978/seaborn-barplot-displaying-values
def show_values_on_bars(axs, h_v="v", space=0.4):
def _show_on_single_plot(ax):
if h_v == "v":
for p in ax.patches:
if ~np.isnan(p.get_height()):
value = float('{:.2f}'.format(p.get_height()))
if value >= 0:
_x = p.get_x() + p.get_width() / 2
_y = p.get_y() + p.get_height() + float(space)
else:
_x = p.get_x() + p.get_width() / 2
_y = p.get_y() + p.get_height() - float(space)
ax.text(_x, _y, value, ha="center")
elif h_v == "h":
for p in ax.patches:
if ~np.isnan(p.get_width()):
value = float('{:.2f}'.format(p.get_width()))
if value >= 0:
_x = p.get_x() + p.get_width() + float(space)
_y = p.get_y() + p.get_height()
else:
_x = p.get_x() + p.get_width() - float(space)
_y = p.get_y() + p.get_height()
ax.text(_x, _y, value, ha="left")
if isinstance(axs, np.ndarray):
for idx, ax in np.ndenumerate(axs):
_show_on_single_plot(ax)
else:
_show_on_single_plot(axs)
XSMALL_SIZE = 12
SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 10
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=SMALL_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=XSMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=XSMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=XSMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
sns.pairplot(makkah_weather);
plot_ys(makkah_weather.temperature);
plt.title('$Makkah$ Tempreture from 2009-2020', fontsize=16);
plt.ylabel('$Tempreture$', fontsize=14);
# Daily temp. changes
plot_ys(d_temp.reset_index()['max'], d_temp.reset_index()['min'], d_temp.reset_index()['mean'], labels=['max', 'min','ave']);
plt.title('$Makkah$ Daily Max , Min & Ave Tempretures from 2009-2020', fontsize=16);
plt.ylabel('$Tempreture$', fontsize=14);
# Monthly temp. changes
plot_ys(m_temp.reset_index(drop=True)['max'], m_temp.reset_index(drop=True)['min'], m_temp.reset_index(drop=True)['mean'], labels=['max', 'min','ave']);
plt.title('$Makkah$ Monthly Tempretures from 2009-2020', fontsize=16);
plt.ylabel('$Tempreture$', fontsize=14);
# Yearly temp. changes
plot_ys(y_temp.reset_index(drop=True)['max'], y_temp.reset_index(drop=True)['min'], y_temp.reset_index(drop=True)['mean'] ,labels=['max', 'min','ave']);
plt.title('$Makkah$ Yearly Tempretures from 2009-2020', fontsize=16);
plt.ylabel('$Tempreture$', fontsize=14);
# # Yearly temp. changes
plt.figure(figsize=(16,6))
sns.barplot(x=y_temp.index, y=y_temp.reset_index(drop=True)['mean'],palette='summer');
plt.title('$Makkah$ Yearly Average Tempretures from 2009-2020', fontsize=16);
plt.ylabel('$Tempreture$', fontsize=14);
plt.xticks(rotation=90);
mkh_df = makkah_weather.drop(['visibility',
'city',
'condition'],
axis=1).groupby([makkah_weather.date.dt.year,
makkah_weather.date.dt.month],
as_index=False).agg(['min','max','mean'])
print('Maximum Recorded Temperature from 2009 -2020:')
print(mkh_df['temperature']['max'][mkh_df['temperature']['max'] == mkh_df['temperature']['max'].max()])
print('Minimum Recorded Temperature from 2009 -2020:')
print(mkh_df['temperature']['min'][mkh_df['temperature']['min'] == mkh_df['temperature']['min'].min()])
def get_mean(month, typ):
# f1 = mkh_df.index.get_level_values(0) == 2020
f2 = mkh_df.index.get_level_values(1) == month
c = mkh_df.iloc[(f2)]
look_up = {'1': 'January', '2': 'February ', '3': 'March', '4': 'April ', '5': 'May',
'6': 'June', '7': 'July', '8': 'August ', '9': 'September',
'10': 'October ', '11': 'November ', '12': 'December'}
nam = look_up[str(month)]
# d = pd.melt(c['temperature'], value_vars=['mean'])
# d = d.rename(columns={'value': 'Temperature'})
c['year'] = np.tile(c.index.get_level_values(0), 1)
plt.figure(figsize=(16,6));
aa = sns.barplot(data=c, x='year',y=c[typ]['mean'].values, palette='summer_r');
sns.set_style("white");
plt.title('$Average$ {} for {} from 2009-2020'.format(typ,nam));
plt.ylabel(typ);
if typ == 'temperature':
plt.ylim(0, max(c[typ]['mean'].values)+4);
show_values_on_bars(aa, h_v="v", space=1)
elif typ == 'humidity':
plt.ylim(0, max(c[typ]['mean'].values)+1);
show_values_on_bars(aa, h_v="v", space=0.2)
elif typ == 'barometer':
plt.ylim(0, max(c[typ]['mean'].values)+110);
show_values_on_bars(aa, h_v="v", space=10)
else:
plt.ylim(0, max(c[typ]['mean'].values)+2);
show_values_on_bars(aa, h_v="v", space=1)
plt.figure(figsize=(16,6));
dum = c[typ]['mean'].values - c[typ]['mean'].values.mean()
aa = sns.barplot(data=c, x='year',y= dum, palette='summer_r');
plt.title('$Average$ {} After Removing the Mean for {} from 2009-2020'.format(typ,nam));
if typ == 'temperature':
plt.ylim(min(dum)-1, max(dum)+1);
show_values_on_bars(aa, h_v="v", space=0.3)
elif typ == 'humidity':
plt.ylim(min(dum)-0.5, max(dum)+0.5);
show_values_on_bars(aa, h_v="v", space=0.1)
else:
plt.ylim(min(dum)-1, max(dum)+1);
show_values_on_bars(aa, h_v="v", space=0.3)
plt.figure(figsize=(16,6));
plt.title('Average {} $Trend$ for {} from 2009-2020'.format(typ,nam));
sns.regplot(data=c, x='year',y=c[typ]['mean'].values,fit_reg=True,color='gray',line_kws={'linestyle':'--'},
ci=None,scatter_kws={'s':180, 'color':'#00943e'});
plt.ylabel(typ);
for m in range(1,13):
get_mean(m, 'temperature')
for m in range(1,13):
get_mean(m, 'humidity')
for m in range(1,13):
get_mean(m, 'wind')
for m in range(1,13):
get_mean(m, 'barometer')
plt.figure(figsize=(16,6));
sns.distplot(makkah_weather.temperature);
plt.title('Makkah $Temperature$ Distribution from 2009-2020', fontsize=16);
plt.figure(figsize=(16,6));
sns.distplot(makkah_weather.humidity);
plt.title('Makkah $Humidity$ Distribution from 2009-2020', fontsize=16);
plt.figure(figsize=(16,6));
sns.distplot(makkah_weather.barometer);
plt.title('Makkah $Barometer$ Distribution from 2009-2020', fontsize=16);
plt.figure(figsize=(16,6));
sns.distplot(makkah_weather.wind);
plt.title('Makkah $Wind$ Distribution from 2009-2020', fontsize=16);
cond = pd.DataFrame(makkah_weather.groupby('condition')['condition'].count())
# print(makkah_weather.groupby('condition')['condition'].count().sum())
cond['ratio'] = makkah_weather.groupby('condition')['condition'].count()/makkah_weather.groupby('condition')['condition'].count().sum()
plt.figure(figsize=(16,6))
cond = cond.sort_values(by='ratio', ascending=False);
aa = sns.barplot(x=cond.index, y=cond.ratio*100, palette='summer');
show_values_on_bars(aa, h_v="v", space=1)
plt.xticks(rotation=90);
plt.title('Makkah $Condition$ Count in % from 2009 to 2020', fontsize=16);
plt.ylim(0,43);
print('Weather condition count totally:\n')
print(cond.condition.sort_values(ascending=False))
ig = cond[cond.condition > 15000]
tg = cond.loc[ig.index]
# Pie chart
labels = ig.index
fig1, ax1 = plt.subplots(figsize=(16,6));
ax1.pie(tg.ratio, labels=labels, autopct='%1.1f%%', startangle=90, explode=[0.01,0.01,0.01]);
#draw circle
centre_circle = plt.Circle((0,0),0.30,fc='white');
fig = plt.gcf();
fig.gca().add_artist(centre_circle);
# Equal aspect ratio ensures that pie is drawn as a circle
ax1.axis('equal');
plt.tight_layout();
plt.title('Makkah $Condition$ Count in % from 2009 to 2020', fontsize=16);
plt.show();
makkah_weather.head()
makkah_weather.to_csv('../data/cleand.csv', index=False)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
# pd.set_option('display.max_rows', None)
# ================================================
from datetime import datetime
from datetime import timedelta
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from time import time
import tensorflow as tf
from tensorflow import keras
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import VarianceThreshold, SelectFromModel, RFE
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, make_scorer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor ,RandomForestRegressor
from sklearn.linear_model import LinearRegression, ElasticNet
from sktime.forecasting.model_selection import SlidingWindowSplitter, ForecastingGridSearchCV
from sktime.forecasting.compose import EnsembleForecaster
from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformers.single_series.detrend import Deseasonalizer, Detrender
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.compose import ReducedRegressionForecaster
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import smape_loss
from sktime.utils.plotting.forecasting import plot_ys
from sktime.transformers.single_series.detrend import ConditionalDeseasonalizer
from sktime.regression.compose import TimeSeriesForestRegressor
# Sesonality test
""" If p-value < 0.05 , then data is not sesonal"""
def perform_adf_test(series):
result = adfuller(series)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
makkah_weather = pd.read_csv('../data/cleand.csv', parse_dates=['date'])
makkah_weather.head()
# Limit the data to start from 2016
s = pd.Timestamp(year=2016, month=1, day=1)
makkah_weather = makkah_weather[makkah_weather.date >= s].reset_index(drop=True)
makkah_weather.head()
# Daily max , min & mean data
d_temp = makkah_weather.groupby([makkah_weather['date'].dt.date]).agg(['min','max','mean'])
# Monthly max , min & mean data
m_temp = makkah_weather.groupby([makkah_weather['date'].dt.year, makkah_weather['date'].dt.month
]).agg(['min','max','mean'])
# yearly max , min & mean data
y_temp = makkah_weather.groupby(makkah_weather['date'].dt.year).agg(['min','max','mean'])
# Daily max and min temperature
dmax = d_temp['temperature']['max'].reset_index(drop=True)
dmin = d_temp['temperature']['min'].reset_index(drop=True)
# yearly max and min temperature
mmax = m_temp['temperature']['max'].reset_index(drop=True)
mmin = m_temp['temperature']['min'].reset_index(drop=True)
# yearly max and min temperature
ymax = y_temp['temperature']['max'].reset_index(drop=True)
ymin = y_temp['temperature']['min'].reset_index(drop=True)
# Minimum Temperatures
t = Deseasonalizer(model='multiplicative', sp=365)
y_dmin = t.fit_transform(dmin)
plot_ys(dmin, y_dmin,labels=['Actual','Deseasonalized']);
plt.title('Minmium Tempretures 2009 - 2020');
print('Sesonality test for Min temp.')
perform_adf_test(y_dmin)
# Maximum Temperatures
y_dmax = t.fit_transform(dmax)
plot_ys(dmax, y_dmax,labels=['Actual','Deseasonalized']);
plt.title('Maximum Tempretures 2009 - 2020');
print('Sesonality test for Max temp.')
perform_adf_test(y_dmax)
y_train, y_test = temporal_train_test_split(y_dmin, test_size=30)
print(y_train.shape, y_test.shape)
fh = np.arange(len(y_test)) + 1
forecaster = AutoARIMA(sp=12, suppress_warnings=True)
forecaster.fit(t.fit_transform(y_train))
y_pred = forecaster.predict(fh)
plot_ys(y_test, y_pred, labels=[ "y_test", "y_pred"]);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
regressor = KNeighborsRegressor(n_neighbors=5)
forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive")
forecaster.fit(t.fit_transform(y_train))
y_pred = forecaster.predict(fh)
plot_ys(y_test, y_pred, labels=[ "y_test", "y_pred"]);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
forecaster = ExponentialSmoothing(seasonal='multiplicative', sp=10)
param_grid = {'seasonal':['add', 'multiplicative' ],'sp':[30,60,90,120,150,180,
210,240,270,300,330,360]}
# we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
gscv.fit(y_train)
y_pred = gscv.predict(fh)
plot_ys(y_test, y_pred, labels=["y_test", "y_pred"]);
print(gscv.best_params_)
forecaster = gscv.best_forecaster_
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
plot_ys( y_test, y_pred, labels=["y_test", "y_pred"]);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
li = [LinearRegression(), KNeighborsRegressor()]
forecaster = ReducedRegressionForecaster(regressor=None, window_length=15, strategy="recursive")
param_grid = {'window_length':[4,5,6,7,8,9,10,12,15,20], 'regressor':li}
# we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
gscv.fit(y_train)
y_pred = gscv.predict(fh)
plot_ys(y_test, y_pred, labels=["y_test", "y_pred"]);
print(gscv.best_params_)
forecaster_min = gscv.best_forecaster_
forecaster_min.fit(y_train)
y_pred = forecaster_min.predict(fh)
plot_ys( y_test, y_pred, labels=["y_test", "y_pred"]);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
sts = StandardScaler()
y = np.array(y_dmin).reshape(-1,1)
yn = sts.fit_transform(y)
def sq(seq, step):
x, y = list(), list()
for i in range(len(seq)):
end_x = i + step
if end_x > len(seq) -1:
break
seq_x, seq_y = seq[i:end_x], seq[end_x]
x.append(seq_x)
y.append(seq_y)
return np.array(x), np.array(y)
Xs, ys = sq(yn, 10)
Xs = Xs.reshape((Xs.shape[0], Xs.shape[1], 1))
X_train, X_test = temporal_train_test_split(np.array(Xs), test_size=0.2)
y_train, y_test = temporal_train_test_split(np.array(ys), test_size=0.2)
print(X_train.shape, y_train.shape)
model = keras.Sequential()
model.add(
keras.layers.Bidirectional(
keras.layers.LSTM(
activation = 'relu',
units=64,
input_shape= X_train.shape
)
)
)
model.add(keras.layers.Dropout(rate=0.2))
model.add(keras.layers.Dense(units=1))
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='mean_squared_error', optimizer=opt)
history = model.fit(X_train,
y_train,
epochs=50,
batch_size=32,
validation_split=0.1,
shuffle=False,
verbose = 0)
plt.figure(figsize=(16,6))
plt.plot(history.history['loss'], label='train');
plt.plot(history.history['val_loss'], label='validation');
plt.legend()
plt.grid()
y_pred = model.predict(X_test);
y_pred = sts.inverse_transform(y_pred)
y_test = sts.inverse_transform(y_test)
y_pred = pd.Series(y_pred.flatten(order='F'))
y_test = pd.Series(y_test.flatten(order='F'))
y_pred = pd.Series(y_pred)
y_test = pd.Series(y_test)
plot_ys(pd.Series(y_test), y_pred, labels=['true','pred']);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
y_train, y_test = temporal_train_test_split(y_dmax, test_size=30)
print(y_train.shape, y_test.shape)
fh = np.arange(len(y_test)) + 1
forecaster = AutoARIMA(sp=12, suppress_warnings=True)
forecaster.fit(t.fit_transform(y_train))
y_pred = forecaster.predict(fh)
plot_ys(y_test, y_pred, labels=[ "y_test", "y_pred"]);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
regressor = KNeighborsRegressor(n_neighbors=5)
forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive")
forecaster.fit(t.fit_transform(y_train))
y_pred = forecaster.predict(fh)
plot_ys(y_test, y_pred, labels=[ "y_test", "y_pred"]);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
forecaster = ExponentialSmoothing(seasonal='multiplicative', sp=10)
param_grid = {'seasonal':['add', 'multiplicative' ],'sp':[30,60,90,120,150,180,
210,240,270,300,330,360]}
# we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
gscv.fit(y_train)
y_pred = gscv.predict(fh)
plot_ys(y_test, y_pred, labels=["y_test", "y_pred"]);
print(gscv.best_params_)
forecaster_max = gscv.best_forecaster_
forecaster_max.fit(y_train)
y_pred = forecaster_max.predict(fh)
plot_ys( y_test, y_pred, labels=["y_test", "y_pred"]);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
li = [LinearRegression(), KNeighborsRegressor()]
forecaster = ReducedRegressionForecaster(regressor=None, window_length=15, strategy="recursive")
param_grid = {'window_length':[4,5,6,7,8,9,10,12,15,20], 'regressor':li}
# we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
gscv.fit(y_train)
y_pred = gscv.predict(fh)
plot_ys(y_test, y_pred, labels=["y_test", "y_pred"]);
print(gscv.best_params_)
forecaster = gscv.best_forecaster_
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
plot_ys( y_test, y_pred, labels=["y_test", "y_pred"]);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
stm = StandardScaler()
y = np.array(y_dmax).reshape(-1,1)
yn = stm.fit_transform(y)
def sq(seq, step):
x, y = list(), list()
for i in range(len(seq)):
end_x = i + step
if end_x > len(seq) -1:
break
seq_x, seq_y = seq[i:end_x], seq[end_x]
x.append(seq_x)
y.append(seq_y)
return np.array(x), np.array(y)
Xs, ys = sq(yn, 10)
Xs = Xs.reshape((Xs.shape[0], Xs.shape[1], 1))
X_train, X_test = temporal_train_test_split(np.array(Xs), test_size=0.2)
y_train, y_test = temporal_train_test_split(np.array(ys), test_size=0.2)
print(X_train.shape, y_train.shape)
model = keras.Sequential()
model.add(
keras.layers.Bidirectional(
keras.layers.LSTM(
activation = 'relu',
units=64,
input_shape= X_train.shape
)
)
)
model.add(keras.layers.Dropout(rate=0.2))
model.add(keras.layers.Dense(units=1))
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='mean_squared_error', optimizer=opt)
history = model.fit(X_train,
y_train,
epochs=50,
batch_size=32,
validation_split=0.1,
shuffle=False,
verbose=0)
plt.figure(figsize=(16,6))
plt.plot(history.history['loss'], label='train');
plt.plot(history.history['val_loss'], label='validation');
plt.legend()
plt.grid()
y_pred = model.predict(X_test);
y_pred = stm.inverse_transform(y_pred)
y_test = stm.inverse_transform(y_test)
y_pred = pd.Series(y_pred.flatten(order='F'))
y_test = pd.Series(y_test.flatten(order='F'))
y_pred = pd.Series(y_pred)
y_test = pd.Series(y_test)
plot_ys(pd.Series(y_test), y_pred, labels=['true','pred']);
print('SMAPE',round(smape_loss(y_test, y_pred),5))
print('RMSE', round(np.sqrt(mean_squared_error(y_test, y_pred)),5))
pd.DataFrame({'test':round(y_test) ,'pred':round(y_pred)})
To simplify the task of forecasting the temperatures, two models were trained one for forecasting minimum temperatures (FMiT) and the other for the maximum temperatures (FMaT). Several Models were tested including time series techniques and deep learining model LSTM.The FMiT performed better than the FMaT with error of ± 1.3 degrees and ± 2.14 degrees respectively (within one standard deviation). The selected model for FMiT is ReducedRegressionForecaster using liner regression, which baiscally reduce the forecasting task to a simpler and related task of tabular regression. The selected model for FMaT is Exponential Smoothing technique, which is simply, smoothing time series data using an exponential window function. An important note about model selection for FMaT that is although the LSTM model sometimes is better by 0.4 degree, it is unstable each time you train because of the initialization of the weight.
1. The average temperatures by same months has an downward trend for January, February, March and April (0.25 - 1 Cْ)
2. The average temperatures by same months has an upward trend for May, June, July, August, October, November and December (0.25 - 1.75 Cْ)